source("GAA-EVAL.R")
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Warning: package 'pROC' was built under R version 3.4.4
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 3.4.3
## Warning: package 'ggrepel' was built under R version 3.4.4
## Warning: package 'gmodels' was built under R version 3.4.4
##
## Attaching package: 'gmodels'
## The following object is masked from 'package:pROC':
##
## ci
exp.data <- read.RealData(file = "exp_data.txt", sep = "\t",
col.id = 1, col.value = 2, col.sd = 3)
head(exp.data$value)
## A10G A129V A131S A150T A150W A152D
## 3523 564 4051 1850 3325 451
sub.data <- read.Submission.Folder(folder.name = "prediction/",col.id = 1,
col.value = 2, col.sd = 3, real.data = exp.data)
## [1] "prediction//Group_39/Group_39-prediction_file-1.txt"
## [1] "prediction//Group_39/Group_39-prediction_file-2.txt"
## [1] "prediction//Group_40/Group_40-prediction_file-1.txt"
## [1] "prediction//Group_41/Group_41-prediction_file-1.txt"
## [1] "prediction//Group_41/Group_41-prediction_file-2.txt"
## [1] "prediction//Group_42/Group_42-prediction_file-1.txt"
## [1] "prediction//Group_42/Group_42-prediction_file-2.txt"
## [1] "prediction//Group_43/Group_43-prediction_file-1.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-1.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-2.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-3.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-4.txt"
## [1] "prediction//Group_45/Group_45-prediction_file-1.txt"
## [1] "prediction//Group_46/Group_46-prediction_file-1-late.txt"
## [1] "prediction//Group_47/Group_47-prediction_file-1.txt"
## [1] "prediction//Group_47/Group_47-prediction_file-2.txt"
sub.data = addGroup(sub.data,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))
plot_all_scatter(real.data = exp.data, pred.data = sub.data, z.transform = TRUE)
## Warning: Removed 4890 rows containing missing values (geom_point).
## Warning: Removed 4890 rows containing missing values (geom_point).
## Warning: Removed 4890 rows containing missing values (geom_point).
## Warning: Removed 4427 rows containing missing values (geom_point).
# 1. Render coefficient value
result.cor.pearson <- eval.Correlation(real.data = exp.data, pred.data = sub.data,
method = "pearson", sd.use = NA,z.transform = TRUE)
head(result.cor.pearson)
## pearson.coefficient.n=5109 p.value
## Group_39-prediction_file-1 0.2205440 2.551258e-57
## Group_39-prediction_file-2 0.2142851 3.864431e-54
## Group_40-prediction_file-1 0.3118278 2.525095e-06
## Group_41-prediction_file-1 0.2138742 6.199527e-54
## Group_41-prediction_file-2 0.2220716 4.124447e-58
## Group_42-prediction_file-1 0.1532161 2.334007e-02
# 2. Plot Correlation
plot.Correlation(result.cor.pearson, "Pearson")
result.rmsd <- eval.RMSD(real.data = exp.data, pred.data = sub.data,sd.use = NA,
density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = TRUE)
head(result.rmsd)
## RMSD
## Group_39-prediction_file-1 0.0017074087
## Group_39-prediction_file-2 0.0017074330
## Group_40-prediction_file-1 0.0003547050
## Group_41-prediction_file-1 0.0017073238
## Group_41-prediction_file-2 0.0017073365
## Group_42-prediction_file-1 0.0003547284
plot.RMSD(result.rmsd, method="")
# with variance.normalization
result.rmsd <- eval.RMSD(real.data = exp.data, pred.data = sub.data,sd.use = NA,
density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = FALSE)
head(result.rmsd)
## RMSD
## Group_39-prediction_file-1 3450.4299
## Group_39-prediction_file-2 3450.4789
## Group_40-prediction_file-1 716.8082
## Group_41-prediction_file-1 3450.2580
## Group_41-prediction_file-2 3450.2837
## Group_42-prediction_file-1 716.8556
plot.RMSD(result.rmsd, method="")
result.auc.0.4 <- eval.AUC(real.data = exp.data, pred.data = sub.data,
threshold = 0.4,z.transform = T)
## [1] "Group_39-prediction_file-1"
## [1] "Group_39-prediction_file-2"
## [1] "Group_40-prediction_file-1"
## [1] "Group_41-prediction_file-1"
## [1] "Group_41-prediction_file-2"
## [1] "Group_42-prediction_file-1"
## [1] "Group_42-prediction_file-2"
## [1] "Group_43-prediction_file-1"
## [1] "Group_44-prediction_file-1"
## [1] "Group_44-prediction_file-2"
## [1] "Group_44-prediction_file-3"
## [1] "Group_44-prediction_file-4"
## [1] "Group_45-prediction_file-1"
## [1] "Group_46-prediction_file-1-late"
## [1] "Group_47-prediction_file-1"
## [1] "Group_47-prediction_file-2"
head(result.auc.0.4$results)
## AUC.0.4 sensitivity specificity accuracy
## Group_39-prediction_file-1 0.6206 0.3318 0.8432 0.6408
## Group_39-prediction_file-2 0.6070 0.2982 0.8827 0.6514
## Group_40-prediction_file-1 0.7330 0.3840 0.8617 0.5890
## Group_41-prediction_file-1 0.6370 0.5084 0.6955 0.6215
## Group_41-prediction_file-2 0.6427 0.5114 0.6945 0.6220
## Group_42-prediction_file-1 0.6091 0.3680 0.7447 0.5297
## precision f1_score bAccu
## Group_39-prediction_file-1 0.5810 0.4224 0.5875
## Group_39-prediction_file-2 0.6249 0.4037 0.5905
## Group_40-prediction_file-1 0.7869 0.5161 0.6229
## Group_41-prediction_file-1 0.5224 0.5153 0.6020
## Group_41-prediction_file-2 0.5230 0.5171 0.6030
## Group_42-prediction_file-1 0.6571 0.4718 0.5563
plot.AUC(result.auc.0.4)
# for all the submission files
result.bM.spearman <-eval.Correlation.Between(real.data = exp.data, pred.data = sub.data,
method = "spearman",sd.use = NA,z.transform = TRUE)
plot.Correlation.Between(result.bM.spearman$coefficient, method="Spearman")
# for best submission of each group
result.bM.spearman <-eval.Correlation.Between(real.data = exp.data, pred.data = sub.data,
method = "pearson",sd.use = NA,z.transform = TRUE,grouped = TRUE)
## Warning: package 'bindrcpp' was built under R version 3.4.4
plot.Correlation.Between(result.bM.spearman$coefficient, method="pearson")
result.pCor <- eval.Partial.Correlation(real.data = exp.data, pred.data = sub.data, method = "spearman")
## Loading required package: ppcor
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.4.4
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Warning in pcor(mat, method = "spearman", ...): The inverse of variance-
## covariance matrix is calculated using Moore-Penrose generalized matrix
## invers due to its determinant of zero.
## Warning in sqrt((n - 2 - gp)/(1 - pcor^2)): NaNs produced
plot.Correlation.Between(result.bM.spearman$coefficient, method="Spearman")
total = cbind(real = exp.data$value,sub.data$value)
Plot.PCA(na.omit(total), labels=F, legend=TRUE)
# uniqueness as adj.r^2 difference between total linear model and linear models without certain group
result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data)
result.uniq
## uniqueness
## 1 -0.0033968380
## 2 -0.0022404747
## 3 0.0190299731
## 4 -0.0024418645
## 5 0.0253771290
## 6 0.0082466666
## 7 -0.0028653213
## 8 -0.0004954721
## 9 -0.0013843547
plot.uniqueness(result.uniq, method="")
For Correlation-based Evaluation, provide mean, CI, and median pval
# 1. Render coefficient value
boot.result.cor.pearson <- eval.Correlation(real.data = exp.data, pred.data = sub.data,
method = "pearson", sd.use = NA,z.transform = TRUE,boot = T)
head(boot.result.cor.pearson)
## avg low_ci high_ci sd
## Group_39-prediction_file-1 0.2207798 0.20120188 0.2412131 0.01218407
## Group_39-prediction_file-2 0.2146817 0.19630053 0.2346390 0.01185537
## Group_40-prediction_file-1 0.3088733 0.20984235 0.3985163 0.05834748
## Group_41-prediction_file-1 0.2141488 0.19221336 0.2342647 0.01326737
## Group_41-prediction_file-2 0.2222340 0.20093036 0.2428381 0.01306769
## Group_42-prediction_file-1 0.1544749 0.04063509 0.2615746 0.06693900
## p.value
## Group_39-prediction_file-1 1.996242e-57
## Group_39-prediction_file-2 2.944147e-54
## Group_40-prediction_file-1 2.441409e-06
## Group_41-prediction_file-1 2.164946e-54
## Group_41-prediction_file-2 1.403092e-58
## Group_42-prediction_file-1 2.142895e-02
# 2. Plot Correlation
plot.Correlation(boot.result.cor.pearson, "Pearson",boot = TRUE)
For RMSD-based Evaluation
boot.result.rmsd <- eval.RMSD(real.data = exp.data, pred.data = sub.data,sd.use = NA,
density.distance = F, density.distance.adjust = F, variance.normalization = T,boot = TRUE)
head(boot.result.rmsd)
## RMSD low_ci high_ci
## Group_39-prediction_file-1 0.0014325208 0.0014046682 0.0014635107
## Group_39-prediction_file-2 0.0014325540 0.0014047012 0.0014635450
## Group_40-prediction_file-1 0.0003260579 0.0003021397 0.0003487363
## Group_41-prediction_file-1 0.0014323977 0.0014045511 0.0014633831
## Group_41-prediction_file-2 0.0014324163 0.0014045686 0.0014634023
## Group_42-prediction_file-1 0.0003260854 0.0003021686 0.0003487574
## sd
## Group_39-prediction_file-1 1.752230e-05
## Group_39-prediction_file-2 1.752269e-05
## Group_40-prediction_file-1 1.370471e-05
## Group_41-prediction_file-1 1.752028e-05
## Group_41-prediction_file-2 1.752052e-05
## Group_42-prediction_file-1 1.370550e-05
plot.RMSD(boot.result.rmsd, method="",boot = TRUE)
For Uniqueness Evaluation
result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data,boot = TRUE)
result.uniq
## uniqueness low_ci high_ci sd
## 1 -0.0005991351 -0.003638222 0.008117041 0.004208176
## 2 0.0014178271 -0.003470437 0.013948044 0.006618052
## 3 0.0222532819 0.001349078 0.057484825 0.017401726
## 4 0.0010291336 -0.003565168 0.012440378 0.005844097
## 5 0.0257885881 0.001552415 0.060673355 0.017557189
## 6 0.0096609177 -0.002808184 0.032872572 0.010947511
## 7 -0.0006321320 -0.003579500 0.007359578 0.003745998
## 8 0.0024353157 -0.003410292 0.016190723 0.006748525
## 9 0.0019408102 -0.003631670 0.016038266 0.006610507
plot.uniqueness(result.uniq, method="",boot = TRUE)